package edu.utexas.cs.tamerProject.envModels.transModels;

import java.util.Random;

import org.rlcommunity.rlglue.codec.types.Action;
import org.rlcommunity.rlglue.codec.types.Observation;

import edu.utexas.cs.tamerProject.envModels.EnvTransModel;
import edu.utexas.cs.tamerProject.utils.encapsulation.ObsAndTerm;

public class MountainCarTransModel extends EnvTransModel {
	
	/*
	 * The parameters below MUST match those in the environmental code.
	 */
    public double minPosition = -1.2;
    public double maxPosition = 0.6;
    public double minVelocity = -0.07;
    public double maxVelocity = 0.07;
    
    public double startRegionLftBorder = -0.7;
    public double startRegionRtBorder = -0.3;
    
    public static double goalPosition = 0.5;
    public double accelerationFactor = 0.001;
    public double gravityFactor = -0.0025;
    public double hillPeakFrequency = 3.0;
    
    //This is the middle of the valley (no slope)                                                                     
    public double defaultInitPosition = -0.5d;
    public double defaultInitVelocity = 0.0d;

    static final int numActions = 3;
                                                                                                                                               
    private Random randomGenerator = new Random();
    private static final long randomSeed = 140823235235345435L;
    private Random seededRdmGen = new Random(randomSeed);
    private int epCounter = 0;
    private static final int epsUntilRepeat = 4000;
    private static final boolean repeatInitState = false;

    private int maxSteps = 400;
    private int completedStepsThisEp;
    
    public boolean randomStarts = true; // In MC code, this is set in MountainCar.getDefaultParameters()

    
    
    public ObsAndTerm getStartObs(){

    	double position;
    	double velocity;
        if (randomStarts) {
            do {
                double randStartPosition;
                if (repeatInitState) {
                    if (epCounter % epsUntilRepeat == 0) {
                        seededRdmGen = new Random(randomSeed);
                    }
                    randStartPosition = (seededRdmGen.nextDouble() * (startRegionRtBorder + Math.abs((startRegionLftBorder)))
                                         - Math.abs(startRegionLftBorder));
                }
                else {
                    randStartPosition = (randomGenerator.nextDouble() * (startRegionRtBorder + Math.abs((startRegionLftBorder)))
                                         - Math.abs(startRegionLftBorder));
                }
                position = randStartPosition;
            } while (position >= goalPosition);
        }
        else {
            position = defaultInitPosition;
        }
        velocity = defaultInitVelocity;
        
        double[] obsDoubleArray = {position, velocity};
        Observation obs = new Observation();
        obs.doubleArray = obsDoubleArray;
        
        return new ObsAndTerm(obs, false);
    }

    
    
    /**
     * Mountain Car is deterministic in the version used by this code, so sampling simply 
     *  returns the deterministically caused next observation.
     */
	public ObsAndTerm sampleNextObsNoForceCont(Observation obs, Action act) {
		int actNum = act.intArray[0];
		double position = obs.doubleArray[0];
		double velocity = obs.doubleArray[1];

        double variedAccel = accelerationFactor;

        /*
         * Update velocity
         */
        velocity += ((actNum - 1)) * variedAccel + getSlope(position) * (gravityFactor);
        if (velocity > maxVelocity) {
            velocity = maxVelocity;
        }
        if (velocity < minVelocity) {
            velocity = minVelocity;
        }
        
        /*
         * Update position
         */
        position += velocity;
        if (position > maxPosition) {
            position = maxPosition;
        }
        if (position < minPosition) {
            position = minPosition;
        }
        if (position == minPosition && velocity < 0) {
            velocity = 0;
        }

        /*
         * Check for terminal state
         */
        if (position >= goalPosition)
        	return new ObsAndTerm(null, true);
        
		Observation newObs = new Observation();
		double[] obsDoubleArray = {position, velocity};
		newObs.doubleArray = obsDoubleArray;
		return new ObsAndTerm(newObs, false);
	}
	
	/**
	 * Get the slop of the hill at this position
	 * @param queryPosition
	 * @return
	 */
	    public double getSlope(double queryPosition) {
	        /*The curve is generated by cos(hillPeakFrequency(x-pi/2)) so the 
	         * pseudo-derivative is cos(hillPeakFrequency* x) 
	         */
	        return Math.cos(hillPeakFrequency * queryPosition);
	    }

}
